library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(patchwork)

Load the Weather Data

weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USC00519397", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2017-01-01",
    date_max = "2017-12-31") %>%
  mutate(
    name = recode(
      id, 
      USW00094728 = "CentralPark_NY", 
      USC00519397 = "Waikiki_HA",
      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2022-09-03 15:53:33 (8.394)
## file min/max dates: 1869-01-01 / 2022-09-30
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USC00519397.dly
## date created (size, mb): 2022-09-03 15:53:37 (1.697)
## file min/max dates: 1965-01-01 / 2020-02-29
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2022-09-03 15:53:39 (0.949)
## file min/max dates: 1999-09-01 / 2022-09-30
weather_df
## # A tibble: 1,095 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

Labels

Providing informative axis labels, plot titles, and captions, via labs(title = "", x = "", y = "", caption = "").

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017"
  )
## Warning: Removed 15 rows containing missing values (geom_point).

Scales

We can set scales for x and y axis by using scale_x_continuous(). We can also label them by using labels = c() inside of scale_x_continuous().

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017") + 
  scale_x_continuous(
    breaks = c(-15, 0, 15), 
    labels = c("-15ºC", "0", "15ºC")) +
  scale_y_continuous(
    trans = "sqrt",
    position = "right"
  )
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (geom_point).

What we did for the x, y-axis scales:

Color Scales

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017") + 
  scale_color_hue(
    name = "Location",
    h = c(100, 300))
## Warning: Removed 15 rows containing missing values (geom_point).

Using name = "xxx" to rename the coloring variance, in this case we use (name = “Location”) to change our variable “name” to “Location”. Note: the variable “name” accidentally shared the same name with the (name = “xxx”) command.

The default hue is “h = c(1, 360)”

Viridis Package for Color Scheme

Trying to create your own color scheme usually doesn’t go well. You can use the “viridis package” instead. There are several options, but the default color scheme works nicely!

Note: viridis::scale_color_viridis() assume the variable you put in is a continuous variable. If we are applying it to a discrete variable (this case, location,) we want to write discrete = TRUE inside the scale_color_viridis command.

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017") + 
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE)
## Warning: Removed 15 rows containing missing values (geom_point).

Themes

Themes are used to modify non-data elements of a plot. They don’t change mappings or how data are render, but control things like background color and location of the the legend. Using themes can help with general plot appearance.

For example, the default legent position is on the right of the graphic. If we want to move it to the bottom, we can use theme(legend.position = "bottom").

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017") + 
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE) +
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

Or we can name the plot and add themes.

ggp_temp_plot =
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package; temperatures in 2017") + 
  viridis::scale_color_viridis(
    name = "Location",
    discrete = TRUE)
ggp_temp_plot + 
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

Some other theme examples: White background with black box around.

ggp_temp_plot + 
  theme_bw()
## Warning: Removed 15 rows containing missing values (geom_point).

Just white background.

ggp_temp_plot + 
  theme_minimal() +
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

ggp_temp_plot + 
  theme_classic()
## Warning: Removed 15 rows containing missing values (geom_point).

In ggthemes there are a lot of themes.

ggp_temp_plot + 
  ggthemes::theme_economist_white()
## Warning: Removed 15 rows containing missing values (geom_point).

ggp_temp_plot + 
  ggthemes::theme_excel()
## Warning: Removed 15 rows containing missing values (geom_point).

ggp_temp_plot + 
  ggthemes::theme_excel_new()
## Warning: Removed 15 rows containing missing values (geom_point).

Setting Opotions

In addition to figure sizing, I include a few other figure preferences in global options declared at the outset of each .Rmd file (this code chunk just gets copy-and-pasted to the beginning of every new file).

library(tidyverse)

knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)

theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)

scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d

What we did in this code chunk:

Data Argument in geom

It’s sometimes necessary to overlay data summaries on a plot of the complete data. Depending on the setting, one way to do this is to create a “summary” dataframe and use that when adding a new geom to a ggplot based on the full data.

We can split “weather_df” into separate datasets for Central Park and Waikiki. Then we use one dataset in the ggplot() and use another in geom_line() inside of ggplot().

Note: we only changed the data set used in geom_line(), it still adopt the global aes() settings.

central_park = 
  weather_df %>% 
  filter(name == "CentralPark_NY")

waikiki = 
  weather_df %>% 
  filter(name == "Waikiki_HA")

ggplot(data = waikiki, aes(x = date, y = tmax, color = name)) + 
  geom_point() + 
  geom_line(data = central_park)
## Warning: Removed 3 rows containing missing values (geom_point).

patchwork

Remember faceting? We’ve seen facetting as an approach to create the “same type of plots” for several levels of a categorical variable.

weather_df %>%
  ggplot(aes(x = tmin, fill = name)) +
  geom_density(alpha = 0.5) +
  facet_grid(. ~name)

What happen when you want multiple plots but cannot facet…? Sometimes, we want to combine two or three fundamentally different plots in the same graphic, such as showing a scatterplot and a boxplot, or show scatterplots illustrating relationships between different variables. In this case, the solution is to create each of the panels you want separately and combine panels using tools in the patchwork package which is loaded in the beginning of this R_Markdown file.

tmax_tmin_p = 
  weather_df %>% 
  ggplot(aes(x = tmax, y = tmin, color = name)) + 
  geom_point(alpha = .5) +
  theme(legend.position = "none")

prcp_dens_p = 
  weather_df %>% 
  filter(prcp > 0) %>% 
  ggplot(aes(x = prcp, fill = name)) + 
  geom_density(alpha = .5) + 
  theme(legend.position = "none")

tmax_date_p = 
  weather_df %>% 
  ggplot(aes(x = date, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  theme(legend.position = "bottom")

(tmax_tmin_p + prcp_dens_p) / tmax_date_p
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'